import numpy as np
import pandas as pd
# sklearn
from sklearn import metrics
from sklearn.model_selection import train_test_split, GridSearchCV, RandomizedSearchCV, cross_val_score, KFold
from sklearn.preprocessing import LabelBinarizer, LabelEncoder
# Tensorflow
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import re
import us
import pgeocode
# Visualisation libraries
## Text
from colorama import Fore, Back, Style
from IPython.display import Image, display, Markdown, Latex, clear_output
## plotly
from plotly.offline import init_notebook_mode, iplot
import plotly.graph_objs as go
import plotly.offline as py
from plotly.subplots import make_subplots
import plotly.express as px
%matplotlib inline
import warnings
warnings.filterwarnings("ignore")
In this article, we use the Consumer Complaint Database published by Consumer Financial Protection Bureau.
According to Kaggle page of Consumer Financial Protection Bureau (CFPB), Each week the CFPB sends thousands of consumers’ complaints about financial products and services to companies for response. Those complaints are published here after the company responds or after 15 days, whichever comes first. By adding their voice, consumers help improve the financial marketplace.
Data = pd.read_csv('ccdatabase/complaints.csv')
Data.head(3)
| Date received | Product | Sub-product | Issue | Sub-issue | Consumer complaint narrative | Company public response | Company | State | ZIP code | Tags | Consumer consent provided? | Submitted via | Date sent to company | Company response to consumer | Timely response? | Consumer disputed? | Complaint ID | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 2019-09-24 | Debt collection | I do not know | Attempts to collect debt not owed | Debt is not yours | transworld systems inc. \nis trying to collect... | NaN | TRANSWORLD SYSTEMS INC | FL | 335XX | NaN | Consent provided | Web | 2019-09-24 | Closed with explanation | Yes | NaN | 3384392 |
| 1 | 2019-09-19 | Credit reporting, credit repair services, or o... | Credit reporting | Incorrect information on your report | Information belongs to someone else | NaN | Company has responded to the consumer and the ... | Experian Information Solutions Inc. | PA | 15206 | NaN | Consent not provided | Web | 2019-09-20 | Closed with non-monetary relief | Yes | NaN | 3379500 |
| 2 | 2019-11-08 | Debt collection | I do not know | Communication tactics | Frequent or repeated calls | Over the past 2 weeks, I have been receiving e... | NaN | Diversified Consultants, Inc. | NC | 275XX | NaN | Consent provided | Web | 2019-11-08 | Closed with explanation | Yes | NaN | 3433198 |
Feat= 'Date received'
Group = Data[Feat].value_counts().to_frame('Count').sort_index().reset_index().rename(columns = {'index': Feat})
fig = px.line(Group, x= Feat, y='Count', color_discrete_sequence = ['tomato'])
fig.update_layout(xaxis_title= Feat.title())
fig.update_xaxes(rangeslider_visible=True, rangeslider =dict(bgcolor = 'WhiteSmoke'),
rangeselector=dict(bgcolor='WhiteSmoke', buttons=list([dict(count=1, label='One Month', step='month', stepmode='todate'),
dict(count=6, label='Six Months', step='month', stepmode='todate'),
dict(count=1, label='This Year', step='year', stepmode='todate'), dict(step='all')])))
fig.update_layout(plot_bgcolor= 'white', height = 550,
title={'text': '<b>' + 'Consumer Complaints by %s' % Feat.title() + '<b>',
'x':.5, 'y': .98, 'xanchor': 'center', 'yanchor': 'top'})
fig.update_xaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True,
showgrid=False, gridwidth=1, gridcolor='Lightgray')
fig.update_yaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True,
showgrid=True, gridwidth=1, gridcolor='Lightgray', range=[0, 4e3])
fig.show()
As can be seen, the number of complaints has been increasing overall.
Feat= 'Product'
Group = Data[Feat].value_counts().to_frame('Count').sort_index().reset_index().rename(columns = {'index': Feat})
Group['Percentage'] = np.round(100*(Group['Count']/Group['Count'].sum()),3)
# display(Group.sort_values(by=['Percentage'], ascending = False).\
# style.hide_index().background_gradient(cmap='Reds', subset=['Percentage']).set_precision(4))
fig = px.bar(Group, y= Feat, x= 'Percentage', orientation='h', text = 'Count')
fig.update_traces(marker_color='orange', marker_line_color='DarkRed', marker_line_width=1.5, opacity=1)
fig.update_traces(texttemplate='%{text:.2s}', textposition='outside')
fig.update_layout(uniformtext_minsize=8, uniformtext_mode='hide', height= 600)
fig.update_layout(plot_bgcolor= 'white')
fig.update_xaxes(showgrid=True, gridwidth=1, gridcolor='Lightgray')
fig.update_xaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True, range=[0, 40])
fig.update_yaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True)
fig.update_layout(title={'text': '<b>' + 'Consumer Complaints by %s' % Feat.title() + '<b>', 'x':.5, 'y': .96,
'xanchor': 'center', 'yanchor': 'top'})
fig.show()
Feat= 'State'
Group = Data[Feat].value_counts().to_frame('Count').sort_index().reset_index().rename(columns = {'index': Feat})
fig = go.Figure(data=go.Choropleth(locations = Group[Feat],
z = Group['Count'].astype(float),
locationmode = 'USA-states',
colorscale = 'Reds',
colorbar_title = 'Number of<br>Consumer<br>Complaints' ))
fig.update_layout(geo_scope='usa')
fig.update_layout(title={'text': '<b>' + 'Consumer Complaints by %s' % Feat.title() + '<b>', 'x':.5, 'y': .92,
'xanchor': 'center', 'yanchor': 'top'})
fig.show()
Group['Percentage'] = np.round(100*(Group['Count']/Group['Count'].sum()),3)
Group = Group.replace(us.states.mapping('abbr', 'name'))
Group.loc[Group.State.isin(['AA', 'AE', 'AP', 'FM', 'MH', 'PW',
'UNITED STATES MINOR OUTLYING ISLANDS']), 'State'] = 'Other States'
Group = Group.sort_values(by=Feat)
fig = px.bar(Group, x= Feat, y= 'Percentage', text = 'Count')
fig.update_traces(marker_color='DarkOrchid', marker_line_color='Indigo', marker_line_width=1.5, opacity=1)
fig.update_traces(texttemplate='%{text:.2s}', textposition='outside')
fig.update_layout(uniformtext_minsize=8, uniformtext_mode='hide', height= 600)
fig.update_layout(plot_bgcolor= 'white')
fig.update_yaxes(showgrid=True, gridwidth=1, gridcolor='Lightgray')
fig.update_xaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True)
fig.update_yaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True, range=[0, 14])
fig.update_layout(title={'text': '<b>' + 'Consumer Complaints by %s' % Feat.title() + '<b>', 'x':.5, 'y': .96,
'xanchor': 'center', 'yanchor': 'top'})
fig.show()
The top ten states with the highest numbers of complaints can be found in the following table.
display(Group.sort_values(by=['Percentage'], ascending = False)[:10].\
style.hide_index().background_gradient(cmap='Reds', subset=['Percentage']).set_precision(4))
| State | Count | Percentage |
|---|---|---|
| California | 269610 | 13.3170 |
| Florida | 223986 | 11.0630 |
| Texas | 189057 | 9.3380 |
| New York | 139319 | 6.8810 |
| Georgia | 114825 | 5.6720 |
| Pennsylvania | 79327 | 3.9180 |
| Illinois | 76352 | 3.7710 |
| New Jersey | 72989 | 3.6050 |
| North Carolina | 61774 | 3.0510 |
| Ohio | 57780 | 2.8540 |
Top = 20
Feat= 'Company'
Group = Data[Feat].value_counts().to_frame('Count').sort_index().reset_index().rename(columns = {'index': Feat})
Group['Percentage'] = np.round(100*(Group['Count']/Group['Count'].sum()),2)
Group = Group.sort_values(by = ['Count'], ascending = False)[:Top]
Group
fig = px.bar(Group, y= Feat, x= 'Percentage', orientation='h', text = 'Count')
fig.update_traces(marker_color='lime', marker_line_color='DarkGreen', marker_line_width=1.5, opacity=1)
fig.update_traces(texttemplate='%{text:.2s}', textposition='outside')
fig.update_layout(uniformtext_minsize=8, uniformtext_mode='hide', height= 600)
fig.update_layout(plot_bgcolor= 'white')
fig.update_xaxes(showgrid=True, gridwidth=1, gridcolor='Lightgray')
fig.update_xaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True, range=[0, 14])
fig.update_yaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True)
fig.update_layout(title={'text': '<b>' + 'Consumer Complaints by %s (Top %i)' % (Feat.title(), Top) + '<b>', 'x':.5, 'y': .96,
'xanchor': 'center', 'yanchor': 'top'})
fig.show()
Top = 20
Feat= 'Issue'
Group = Data[Feat].value_counts().to_frame('Count').sort_index().reset_index().rename(columns = {'index': Feat})
Group['Percentage'] = np.round(100*(Group['Count']/Group['Count'].sum()),2)
Group = Group.sort_values(by = ['Count'], ascending = False)[:Top]
Group
fig = px.bar(Group, y= Feat, x= 'Percentage', orientation='h', text = 'Count')
fig.update_traces(marker_color='lightyellow', marker_line_color='Gold', marker_line_width=1.5, opacity=1)
fig.update_traces(texttemplate='%{text:.2s}', textposition='outside')
fig.update_layout(uniformtext_minsize=8, uniformtext_mode='hide', height= 600)
fig.update_layout(plot_bgcolor= 'white')
fig.update_xaxes(showgrid=True, gridwidth=1, gridcolor='Lightgray')
fig.update_xaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True, range=[0, 25])
fig.update_yaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True)
fig.update_layout(title={'text': '<b>' + 'Consumer Complaints by %s (Top %i)' % (Feat.title(), Top) + '<b>', 'x':.5, 'y': .96,
'xanchor': 'center', 'yanchor': 'top'})
fig.show()
Data['Date received'] = pd.to_datetime(Data['Date received'])
Data['Date sent to company'] = pd.to_datetime(Data['Date sent to company'])
Group = (Data['Date sent to company'] - Data['Date received'])
Group = Group .value_counts().to_frame('Count').reset_index(drop = False).rename(columns = {'index':'Days'})
Group['Days'] = (Group['Days']/ np.timedelta64(1, 'D')).astype(int)
Group.loc[Group['Days']<0, 'Days'] =0
Group = Group.sort_values(by=['Days'])
fig = px.line(Group, x= 'Days', y='Count', color_discrete_sequence = ['royalblue'])
fig.update_xaxes(rangeslider_visible=True, rangeslider =dict(bgcolor = 'WhiteSmoke'))
fig.update_layout(plot_bgcolor= 'white', height = 550,
title={'text': '<b>' + 'Consumer Complaint Sent to Company' + '<b>',
'x':.5, 'y': .98, 'xanchor': 'center', 'yanchor': 'top'})
fig.update_xaxes(title= 'Days After Complain Received',
showline=True, linewidth=1, linecolor='Lightgray', mirror=True,
showgrid=False, gridwidth=1, gridcolor='Lightgray')
fig.update_yaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True,
showgrid=True, gridwidth=1, gridcolor='Lightgray', range=[0, 1.6e6])
fig.show()
Temp = ['Company response to consumer','Timely response?', 'Consumer disputed?']
Group = Data[Temp].groupby(Temp)[[Temp[0]]].agg('count').rename(columns = {Temp[0]:'Count'}).reset_index(drop = False)
Group['Label'] = np.nan
Group.loc[Group['Timely response?'] == 'Yes', 'Label'] = Group['Company response to consumer'] + ' (Timely response)'
Group.loc[Group['Timely response?'] == 'No', 'Label'] = Group['Company response to consumer'] + ' (Not Timely response)'
Group['Percentage'] = np.round(100* Group['Count'] /Group['Count'].sum(), 2)
C = ['aquamarine', 'steelblue']
SC = 'Navy'
fig = px.bar(Group, x= 'Percentage', y= 'Label', orientation='h',
color = 'Consumer disputed?', text = 'Percentage', color_discrete_sequence= C, height= 500)
fig.update_traces(marker_line_color=SC, marker_line_width=1.5, opacity=1)
fig.update_traces(texttemplate='%{text:.2}', textposition='inside')
fig.update_layout(uniformtext_minsize= 8, uniformtext_mode='hide')
fig['layout']['xaxis'].update(range=[0, 80])
fig.update_layout(title = """Customers' Complains by Companies""", plot_bgcolor= 'white')
fig.update_xaxes(showgrid=True, gridwidth=1, gridcolor='Lightgray')
fig.update_xaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True)
fig.update_yaxes(showline=True, linewidth=1, linecolor='Lightgray', mirror=True)
fig.update_layout(yaxis_title= 'Company Response to Consumer')
fig.show()